BAYESIAN CLASSIFIER WITH SINGLE GAUSSIAN PER CLASS - MNIST DATA


In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

pd.set_option('display.max_columns', 800)

%matplotlib inline

Loading the csv's for each digit into one single dataframe

In [4]:
train = pd.DataFrame(columns=['V'+str(i) for i in range(1,785)]+['label'])
for i in range(10):
    temp = pd.read_csv(f"MNIST/train{i}.csv")
    temp = temp.iloc[:,1:]
    temp['label'] = i
    train = pd.concat([train,temp])
    print(i,end=" ")
display(train.shape)
train.head()
0 1 2 3 4 5 6 7 8 9 
(36433, 785)
Out[4]:
V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20 V21 V22 V23 V24 V25 V26 V27 V28 V29 V30 V31 V32 V33 V34 V35 V36 V37 V38 V39 V40 V41 V42 V43 V44 V45 V46 V47 V48 V49 V50 V51 V52 V53 V54 V55 V56 V57 V58 V59 V60 V61 V62 V63 V64 V65 V66 V67 V68 V69 V70 V71 V72 V73 V74 V75 V76 V77 V78 V79 V80 V81 V82 V83 V84 V85 V86 V87 V88 V89 V90 V91 V92 V93 V94 V95 V96 V97 V98 V99 V100 V101 V102 V103 V104 V105 V106 V107 V108 V109 V110 V111 V112 V113 V114 V115 V116 V117 V118 V119 V120 V121 V122 V123 V124 V125 V126 V127 V128 V129 V130 V131 V132 V133 V134 V135 V136 V137 V138 V139 V140 V141 V142 V143 V144 V145 V146 V147 V148 V149 V150 V151 V152 V153 V154 V155 V156 V157 V158 V159 V160 V161 V162 V163 V164 V165 V166 V167 V168 V169 V170 V171 V172 V173 V174 V175 V176 V177 V178 V179 V180 V181 V182 V183 V184 V185 V186 V187 V188 V189 V190 V191 V192 V193 V194 V195 V196 V197 V198 V199 V200 V201 V202 V203 V204 V205 V206 V207 V208 V209 V210 V211 V212 V213 V214 V215 V216 V217 V218 V219 V220 V221 V222 V223 V224 V225 V226 V227 V228 V229 V230 V231 V232 V233 V234 V235 V236 V237 V238 V239 V240 V241 V242 V243 V244 V245 V246 V247 V248 V249 V250 V251 V252 V253 V254 V255 V256 V257 V258 V259 V260 V261 V262 V263 V264 V265 V266 V267 V268 V269 V270 V271 V272 V273 V274 V275 V276 V277 V278 V279 V280 V281 V282 V283 V284 V285 V286 V287 V288 V289 V290 V291 V292 V293 V294 V295 V296 V297 V298 V299 V300 V301 V302 V303 V304 V305 V306 V307 V308 V309 V310 V311 V312 V313 V314 V315 V316 V317 V318 V319 V320 V321 V322 V323 V324 V325 V326 V327 V328 V329 V330 V331 V332 V333 V334 V335 V336 V337 V338 V339 V340 V341 V342 V343 V344 V345 V346 V347 V348 V349 V350 V351 V352 V353 V354 V355 V356 V357 V358 V359 V360 V361 V362 V363 V364 V365 V366 V367 V368 V369 V370 V371 V372 V373 V374 V375 V376 V377 V378 V379 V380 V381 V382 V383 V384 V385 V386 V387 V388 V389 V390 V391 V392 V393 V394 V395 V396 V397 V398 V399 V400 V401 V402 V403 V404 V405 V406 V407 V408 V409 V410 V411 V412 V413 V414 V415 V416 V417 V418 V419 V420 V421 V422 V423 V424 V425 V426 V427 V428 V429 V430 V431 V432 V433 V434 V435 V436 V437 V438 V439 V440 V441 V442 V443 V444 V445 V446 V447 V448 V449 V450 V451 V452 V453 V454 V455 V456 V457 V458 V459 V460 V461 V462 V463 V464 V465 V466 V467 V468 V469 V470 V471 V472 V473 V474 V475 V476 V477 V478 V479 V480 V481 V482 V483 V484 V485 V486 V487 V488 V489 V490 V491 V492 V493 V494 V495 V496 V497 V498 V499 V500 V501 V502 V503 V504 V505 V506 V507 V508 V509 V510 V511 V512 V513 V514 V515 V516 V517 V518 V519 V520 V521 V522 V523 V524 V525 V526 V527 V528 V529 V530 V531 V532 V533 V534 V535 V536 V537 V538 V539 V540 V541 V542 V543 V544 V545 V546 V547 V548 V549 V550 V551 V552 V553 V554 V555 V556 V557 V558 V559 V560 V561 V562 V563 V564 V565 V566 V567 V568 V569 V570 V571 V572 V573 V574 V575 V576 V577 V578 V579 V580 V581 V582 V583 V584 V585 V586 V587 V588 V589 V590 V591 V592 V593 V594 V595 V596 V597 V598 V599 V600 V601 V602 V603 V604 V605 V606 V607 V608 V609 V610 V611 V612 V613 V614 V615 V616 V617 V618 V619 V620 V621 V622 V623 V624 V625 V626 V627 V628 V629 V630 V631 V632 V633 V634 V635 V636 V637 V638 V639 V640 V641 V642 V643 V644 V645 V646 V647 V648 V649 V650 V651 V652 V653 V654 V655 V656 V657 V658 V659 V660 V661 V662 V663 V664 V665 V666 V667 V668 V669 V670 V671 V672 V673 V674 V675 V676 V677 V678 V679 V680 V681 V682 V683 V684 V685 V686 V687 V688 V689 V690 V691 V692 V693 V694 V695 V696 V697 V698 V699 V700 V701 V702 V703 V704 V705 V706 V707 V708 V709 V710 V711 V712 V713 V714 V715 V716 V717 V718 V719 V720 V721 V722 V723 V724 V725 V726 V727 V728 V729 V730 V731 V732 V733 V734 V735 V736 V737 V738 V739 V740 V741 V742 V743 V744 V745 V746 V747 V748 V749 V750 V751 V752 V753 V754 V755 V756 V757 V758 V759 V760 V761 V762 V763 V764 V765 V766 V767 V768 V769 V770 V771 V772 V773 V774 V775 V776 V777 V778 V779 V780 V781 V782 V783 V784 label
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 51 159 253 159 50 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 48 238 252 252 252 237 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 54 227 253 252 239 233 252 57 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 10 60 224 252 253 252 202 84 252 253 122 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 163 252 252 252 253 252 252 96 189 253 167 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 51 238 253 253 190 114 253 228 47 79 255 168 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 48 238 252 252 179 12 75 121 21 0 0 253 243 50 0 0 0 0 0 0 0 0 0 0 0 0 0 38 165 253 233 208 84 0 0 0 0 0 0 253 252 165 0 0 0 0 0 0 0 0 0 0 0 0 7 178 252 240 71 19 28 0 0 0 0 0 0 253 252 195 0 0 0 0 0 0 0 0 0 0 0 0 57 252 252 63 0 0 0 0 0 0 0 0 0 253 252 195 0 0 0 0 0 0 0 0 0 0 0 0 198 253 190 0 0 0 0 0 0 0 0 0 0 255 253 196 0 0 0 0 0 0 0 0 0 0 0 76 246 252 112 0 0 0 0 0 0 0 0 0 0 253 252 148 0 0 0 0 0 0 0 0 0 0 0 85 252 230 25 0 0 0 0 0 0 0 0 7 135 253 186 12 0 0 0 0 0 0 0 0 0 0 0 85 252 223 0 0 0 0 0 0 0 0 7 131 252 225 71 0 0 0 0 0 0 0 0 0 0 0 0 85 252 145 0 0 0 0 0 0 0 48 165 252 173 0 0 0 0 0 0 0 0 0 0 0 0 0 0 86 253 225 0 0 0 0 0 0 114 238 253 162 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 85 252 249 146 48 29 85 178 225 253 223 167 56 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 85 252 252 252 229 215 252 252 252 196 130 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 28 199 252 252 253 252 252 233 145 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 25 128 252 253 252 141 37 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 64 253 255 63 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 96 205 251 253 205 111 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 96 189 251 251 253 251 251 31 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 16 64 223 244 251 251 211 213 251 251 31 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 80 181 251 253 251 251 251 94 96 251 251 31 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 92 253 253 253 255 253 253 253 95 96 253 253 31 0 0 0 0 0 0 0 0 0 0 0 0 0 0 92 236 251 243 220 233 251 251 243 82 96 251 251 31 0 0 0 0 0 0 0 0 0 0 0 0 0 80 253 251 251 188 0 96 251 251 109 0 96 251 251 31 0 0 0 0 0 0 0 0 0 0 0 0 96 240 253 243 188 42 0 96 204 109 4 0 12 197 251 31 0 0 0 0 0 0 0 0 0 0 0 0 221 251 253 121 0 0 0 36 23 0 0 0 0 190 251 31 0 0 0 0 0 0 0 0 0 0 0 48 234 253 0 0 0 0 0 0 0 0 0 0 0 191 253 31 0 0 0 0 0 0 0 0 0 0 44 221 251 251 0 0 0 0 0 0 0 0 0 0 12 197 251 31 0 0 0 0 0 0 0 0 0 0 190 251 251 251 0 0 0 0 0 0 0 0 0 0 96 251 251 31 0 0 0 0 0 0 0 0 0 0 190 251 251 113 0 0 0 0 0 0 0 0 0 40 234 251 219 23 0 0 0 0 0 0 0 0 0 0 190 251 251 94 0 0 0 0 0 0 0 0 40 217 253 231 47 0 0 0 0 0 0 0 0 0 0 0 191 253 253 253 0 0 0 0 0 0 12 174 253 253 219 39 0 0 0 0 0 0 0 0 0 0 0 0 67 236 251 251 191 190 111 72 190 191 197 251 243 121 39 0 0 0 0 0 0 0 0 0 0 0 0 0 0 63 236 251 253 251 251 251 251 253 251 188 94 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 27 129 253 251 251 251 251 229 168 15 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 95 212 251 211 94 59 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 53 255 253 253 253 124 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 180 253 251 251 251 251 145 62 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 32 217 241 253 251 251 251 251 253 107 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 37 251 251 253 251 251 251 251 253 107 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 166 251 251 253 251 96 148 251 253 107 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 73 253 253 253 253 130 0 0 110 253 255 108 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 73 251 251 251 251 0 0 0 109 251 253 107 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 202 251 251 251 225 0 0 6 129 251 253 107 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 150 251 251 251 71 0 0 115 251 251 253 107 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 253 251 251 173 20 0 0 217 251 251 253 107 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 182 255 253 216 0 0 0 0 218 253 253 182 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 63 221 253 251 215 0 0 0 84 236 251 251 77 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 109 251 253 251 215 0 0 11 160 251 251 96 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 109 251 253 251 137 0 0 150 251 251 251 71 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 109 251 253 251 35 0 130 253 251 251 173 20 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 110 253 255 253 98 150 253 255 253 164 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 109 251 253 251 251 251 251 253 251 35 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 93 241 253 251 251 251 251 216 112 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 103 253 251 251 251 251 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 124 251 225 71 71 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 46 105 254 254 254 254 255 239 41 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 37 118 222 254 253 253 253 253 253 253 211 54 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 14 200 253 253 254 253 253 253 253 253 253 253 116 0 0 0 0 0 0 0 0 0 0 0 0 0 16 160 236 253 253 253 254 253 253 246 229 253 253 253 116 0 0 0 0 0 0 0 0 0 0 0 0 0 99 253 253 253 253 253 254 253 253 213 99 253 253 253 116 0 0 0 0 0 0 0 0 0 0 0 0 25 194 253 253 253 253 131 97 169 253 93 99 253 253 253 116 0 0 0 0 0 0 0 0 0 0 0 0 206 253 253 251 233 127 9 0 18 38 3 15 171 253 253 116 0 0 0 0 0 0 0 0 0 0 0 55 240 253 253 233 0 0 0 0 0 0 0 31 186 253 253 116 0 0 0 0 0 0 0 0 0 0 0 176 253 253 253 127 0 0 0 0 0 0 0 99 253 253 253 116 0 0 0 0 0 0 0 0 0 0 0 176 253 253 131 9 0 0 0 0 0 0 0 99 253 253 253 116 0 0 0 0 0 0 0 0 0 0 119 254 254 232 75 0 0 0 0 0 0 0 0 0 158 254 254 117 0 0 0 0 0 0 0 0 0 0 118 253 253 154 0 0 0 0 0 0 0 0 0 0 156 253 253 116 0 0 0 0 0 0 0 0 0 0 118 253 253 154 0 0 0 0 0 0 0 0 0 0 156 253 253 116 0 0 0 0 0 0 0 0 0 46 222 253 253 154 0 0 0 0 0 0 0 0 7 116 246 253 180 9 0 0 0 0 0 0 0 0 0 0 118 253 253 154 0 0 0 0 0 0 0 0 116 253 253 253 174 0 0 0 0 0 0 0 0 0 0 0 118 253 253 154 0 0 0 0 0 0 0 110 246 253 253 240 67 0 0 0 0 0 0 0 0 0 0 0 118 253 253 238 215 49 20 20 20 66 215 241 253 245 233 64 0 0 0 0 0 0 0 0 0 0 0 0 82 229 253 253 253 253 253 253 253 254 253 253 240 107 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 176 253 253 253 253 253 253 253 254 253 253 108 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 40 239 253 253 253 253 253 253 254 161 57 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 21 176 253 253 124 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 105 176 251 251 251 251 105 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 58 217 241 253 251 251 251 251 243 113 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 63 231 251 251 253 251 251 251 251 253 251 113 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 144 251 251 251 253 251 251 251 251 253 251 215 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 125 253 253 253 253 255 253 253 253 253 255 253 227 42 0 0 0 0 0 0 0 0 0 0 0 0 0 0 253 251 251 251 251 253 251 251 251 251 253 251 251 142 0 0 0 0 0 0 0 0 0 0 0 0 0 27 253 251 251 235 241 253 251 246 137 35 98 251 251 236 61 0 0 0 0 0 0 0 0 0 0 0 47 211 253 251 235 82 103 253 251 137 0 0 73 251 251 251 71 0 0 0 0 0 0 0 0 0 0 27 211 251 253 251 86 0 0 72 71 10 0 0 73 251 251 173 20 0 0 0 0 0 0 0 0 0 0 89 253 253 255 253 35 0 0 0 0 0 0 0 73 253 253 253 72 0 0 0 0 0 0 0 0 0 84 236 251 251 253 251 138 0 0 0 0 0 0 0 73 251 251 251 71 0 0 0 0 0 0 0 0 63 236 251 251 251 227 251 246 138 11 0 0 0 16 37 228 251 246 137 10 0 0 0 0 0 0 0 0 73 251 251 251 173 42 142 142 142 41 0 0 0 109 251 253 251 137 0 0 0 0 0 0 0 0 0 0 73 251 251 173 20 0 0 0 0 0 0 0 27 211 251 253 147 10 0 0 0 0 0 0 0 0 0 0 73 253 253 143 0 0 0 0 0 0 21 176 253 253 253 0 0 0 0 0 0 0 0 0 0 0 0 0 73 251 251 205 144 0 0 0 0 0 176 251 251 188 107 0 0 0 0 0 0 0 0 0 0 0 0 0 62 236 251 251 251 218 217 217 217 217 253 230 189 20 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 83 158 251 251 253 251 251 251 251 253 107 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 37 251 251 253 251 251 251 122 72 30 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

Drop NA values


In [5]:
train.dropna(inplace=True)
train.reset_index(inplace=True,drop=True)
train.shape
Out[5]:
(36051, 785)
In [6]:
train = train.apply(pd.to_numeric)

Drop columns that are all zeros

In [7]:
non_zero_cols = list(filter(lambda x: train[x].sum() > 0,train.columns))
In [9]:
train = train[non_zero_cols]
train.shape
Out[9]:
(36051, 709)

Drop columns columns where covariance > 0.97, as it creates singular eigenvalue matrices and messes up LDA calculation

In [10]:
corr_matrix = train.corr().abs()
upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(np.bool))
to_drop = [column for column in upper.columns if any(upper[column] > 0.97)]
In [11]:
train.drop(columns=to_drop,inplace=True)
train.reset_index(inplace=True,drop=True)
In [12]:
train.shape
Out[12]:
(36051, 702)
In [13]:
from sklearn.decomposition import PCA
In [71]:
pca = PCA(n_components=9)
pca.fit(train.iloc[:,:-1])

xPCA = pca.fit_transform(train.iloc[:,:-1])
xPCA.shape
Out[71]:
(36051, 9)
In [15]:
cm = plt.get_cmap('gist_rainbow')
colors = [cm(i/9) for i in range(10)]

Plotting the digits against each pair of the 9 components of PCA.

In [16]:
fig, axes = plt.subplots(nrows=6,ncols=6,figsize=(20,20))
flats = axes.flatten()
counter = 0
for i in range(8):
    for j in range(i+1,9):
        tempDF = pd.DataFrame({'label':train['label'],
                                'first':list(map(lambda x: x[i],xPCA)),
                                'second':list(map(lambda x: x[j],xPCA))})
        for lab in tempDF.label.unique():
            temp = tempDF[tempDF['label'] == lab]
            flats[counter].scatter(temp['first'],temp['second'],c=colors[lab],label=lab,s=0.5,alpha=1)
        counter += 1

handles, labels = flats[2].get_legend_handles_labels()
fig.legend(handles, labels, ncol=10, loc='upper center', bbox_to_anchor=(0.5, 1.02),
           prop={'size':16}, markerscale=30, framealpha=1)

fig.tight_layout()

As expected, PCA finds axes that capture most variation. It doesn't necessarily find the axes that separate the classes, as is evident from the plots above.

In [17]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
In [72]:
clf = LinearDiscriminantAnalysis(solver='eigen',n_components=9)
xLDA = clf.fit(train.iloc[:,:-1],train['label']).transform(train.iloc[:,:-1])
In [19]:
xLDA.shape
Out[19]:
(36051, 9)

Plotting the digits against each pair out of the 9 components of LDA. Here we expect to be able to visualize classes separated out.

In [20]:
fig, axes = plt.subplots(nrows=6,ncols=6,figsize=(20,20))
flats = axes.flatten()
counter = 0
for i in range(8):
    for j in range(i+1,9):
        tempDF = pd.DataFrame({'label':train['label'],
                                'first':list(map(lambda x: x[i],xLDA)),
                                'second':list(map(lambda x: x[j],xLDA))})
        for k,lab in enumerate(tempDF.label.unique()):
            temp = tempDF[tempDF['label'] == lab]
            flats[counter].scatter(temp['first'],temp['second'],c=colors[k],label=lab,s=0.5,alpha=1,edgecolor=None)
        counter += 1

handles, labels = flats[2].get_legend_handles_labels()
fig.legend(handles, labels, ncol=10, loc='upper center', bbox_to_anchor=(0.5, 1.02),
           prop={'size':16}, markerscale=30, framealpha=1)

#flats[0].legend(loc='upper center', bbox_to_anchor=(0.5, -0.25), ncol=10, fontsize='large', fancybox=True, shadow=True)
fig.tight_layout()

Comparison of how the best two components of LDA and PCA picture digits 1 and 8.

In [21]:
fig, axes = plt.subplots(ncols=2,figsize=(12,6))

tempDF = pd.DataFrame({'label':train['label'],
                        'firstLDA':list(map(lambda x: x[0],xLDA)),
                        'secondLDA':list(map(lambda x: x[1],xLDA)),
                        'firstPCA':list(map(lambda x: x[0],xPCA)),
                        'secondPCA':list(map(lambda x: x[1],xPCA))})
#t = 0
#for lab in tempDF.label.unique():
for lab in [1,8]:
    temp = tempDF[tempDF['label'] == lab]
    temp = tempDF[tempDF['label'] == lab]
    axes[0].scatter(temp['firstLDA'],temp['secondLDA'],c=colors[lab],label=lab,s=50,marker=f'${lab}$',alpha=0.3,edgecolor=None)
    axes[0].set_title("LDA Projection of 1 and 8")
for lab in [1,8]:
    temp = tempDF[tempDF['label'] == lab]
    temp = tempDF[tempDF['label'] == lab]
    axes[1].scatter(temp['firstPCA'],temp['secondPCA'],c=colors[lab],label=lab,s=50,marker=f'${lab}$',alpha=0.3,edgecolor=None)
    axes[1].set_title("PCA Projection of 1 and 8")

fig.tight_layout()
In [22]:
xPCA.shape
Out[22]:
(36051, 9)

Let's generate the Gaussian parameters for each class and build the Discriminant function.


Below function calculates and returns a list of length 9, for `mean`, `variance-covariance` matrix and `inverse-variance-covariance` matrix for 9 dimensions of PCA/LDA. Each item in the respective list contains the classwise mean, sigma and sigma-inv that will be used to calculate the Discriminant function later.

In [34]:
def get_mu_sigmas_inv(df,label_df):
    muList = []
    sigmaList = []
    invSigmaList = []

    for i in range(10): # because 10 classes: 0,1,2,3,4,5,6,7,8,9 digits
        ixs = label_df[label_df.label == i].index

        mu = df[ixs].mean(axis=0) # mu is a vector of length 9, since we have 9 PCA/LDA dimensions
        muList.append(mu)

        bigSigma = np.cov(df[ixs].T) # Matrix of dim 9x9, it represents the variance-covariance matrix of the 9 PCA/LDA dims
        sigmaList.append(bigSigma) # Inverse of the matrix above, also 9x9

        invSigmaList.append(np.linalg.inv(bigSigma))
    return muList,sigmaList,invSigmaList
In [35]:
muListPCA, sigmaListPCA, invSigmaListPCA = get_mu_sigmas_inv(xPCA,train)

Get class priors.

In [24]:
priors = train.groupby(['label'])['label'].count()/train.shape[0]

The actual formula that gives us log probability, log(P(class i and input j)), for a given input vector and label class.

In [25]:
def calc_discriminant(i,row,muList,sigmaList,invSigmaList,priors):
    return -0.5*(row-muList[i]).dot(invSigmaList[i]).T.dot(row-muList[i])-0.5*np.log(np.linalg.det(sigmaList[i]))+np.log(priors[i])

This method predicts the label for given input data.

In [36]:
def predict(X,muList,sigmaList,invSigmaList,priors):
    train_predictions = []
    for i,row in enumerate(X):
        temp = list(map(lambda x: calc_discriminant(x,row,muList,sigmaList,invSigmaList,priors),list(range(10))))
        train_predictions.append(np.argmax(temp))
        if i%1000 == 0:
            print(i,end=" ")
    return train_predictions
In [37]:
from sklearn.metrics import accuracy_score
In [39]:
pred_pca_full_train = np.array(predict(xPCA,muListPCA,sigmaListPCA,invSigmaListPCA,priors))
pca_full_train_accuracy = accuracy_score(pred_pca_full_train,train['label'])
pca_full_train_accuracy
0 1000 2000 3000 4000 5000 6000 7000 8000 9000 10000 11000 12000 13000 14000 15000 16000 17000 18000 19000 20000 21000 22000 23000 24000 25000 26000 27000 28000 29000 30000 31000 32000 33000 34000 35000 36000 
Out[39]:
0.8725139385870018

Convert the full PCA Variance Covariance matrix to Diagonal Matrix, and inverse for new matrix.

In [44]:
sigmaListPCADiag = np.array(list(map(lambda x: np.diag(np.diag(x)),sigmaListPCA)))
invSigmaListPCADiag = np.array(list(map(lambda x: np.linalg.inv(x),sigmaListPCADiag)))
In [45]:
pred_pca_diag_train = np.array(predict(xPCA,muListPCA,sigmaListPCADiag,invSigmaListPCADiag,priors))
pca_diag_train_accuracy = accuracy_score(pred_pca_diag_train,train['label'])
pca_diag_train_accuracy
0 1000 2000 3000 4000 5000 6000 7000 8000 9000 10000 11000 12000 13000 14000 15000 16000 17000 18000 19000 20000 21000 22000 23000 24000 25000 26000 27000 28000 29000 30000 31000 32000 33000 34000 35000 36000 
Out[45]:
0.7595073645668636
In [46]:
muListLDA, sigmaListLDA, invSigmaListLDA = get_mu_sigmas_inv(xLDA,train)
In [47]:
pred_lda_full_train = np.array(predict(xLDA,muListLDA,sigmaListLDA,invSigmaListLDA,priors))
lda_full_train_accuracy = accuracy_score(pred_lda_full_train,train['label'])
lda_full_train_accuracy
0 1000 2000 3000 4000 5000 6000 7000 8000 9000 10000 11000 12000 13000 14000 15000 16000 17000 18000 19000 20000 21000 22000 23000 24000 25000 26000 27000 28000 29000 30000 31000 32000 33000 34000 35000 36000 
Out[47]:
0.8942886466394829

Convert the full LDA Variance Covariance matrix to Diagonal Matrix, and inverse for new matrix.

In [48]:
sigmaListLDADiag = np.array(list(map(lambda x: np.diag(np.diag(x)),sigmaListLDA)))
invSigmaListLDADiag = np.array(list(map(lambda x: np.linalg.inv(x),sigmaListLDADiag)))
In [49]:
pred_lda_diag_train = np.array(predict(xLDA,muListLDA,sigmaListLDADiag,invSigmaListLDADiag,priors))
lda_diag_train_accuracy = accuracy_score(pred_lda_diag_train,train['label'])
lda_diag_train_accuracy
0 1000 2000 3000 4000 5000 6000 7000 8000 9000 10000 11000 12000 13000 14000 15000 16000 17000 18000 19000 20000 21000 22000 23000 24000 25000 26000 27000 28000 29000 30000 31000 32000 33000 34000 35000 36000 
Out[49]:
0.8819172838478821

Load Test Data and do the predictions.

In [73]:
test = pd.DataFrame(columns=['V'+str(i) for i in range(1,785)]+['label'])
for i in range(10):
    temp = pd.read_csv(f"MNIST/test{i}.csv")
    temp = temp.iloc[:,1:]
    temp['label'] = i
    test = pd.concat([test,temp])
    print(i,end=" ")
display(test.shape)
test.head()
0 1 2 3 4 5 6 7 8 9 
(24227, 785)
Out[73]:
V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20 V21 V22 V23 V24 V25 V26 V27 V28 V29 V30 V31 V32 V33 V34 V35 V36 V37 V38 V39 V40 V41 V42 V43 V44 V45 V46 V47 V48 V49 V50 V51 V52 V53 V54 V55 V56 V57 V58 V59 V60 V61 V62 V63 V64 V65 V66 V67 V68 V69 V70 V71 V72 V73 V74 V75 V76 V77 V78 V79 V80 V81 V82 V83 V84 V85 V86 V87 V88 V89 V90 V91 V92 V93 V94 V95 V96 V97 V98 V99 V100 V101 V102 V103 V104 V105 V106 V107 V108 V109 V110 V111 V112 V113 V114 V115 V116 V117 V118 V119 V120 V121 V122 V123 V124 V125 V126 V127 V128 V129 V130 V131 V132 V133 V134 V135 V136 V137 V138 V139 V140 V141 V142 V143 V144 V145 V146 V147 V148 V149 V150 V151 V152 V153 V154 V155 V156 V157 V158 V159 V160 V161 V162 V163 V164 V165 V166 V167 V168 V169 V170 V171 V172 V173 V174 V175 V176 V177 V178 V179 V180 V181 V182 V183 V184 V185 V186 V187 V188 V189 V190 V191 V192 V193 V194 V195 V196 V197 V198 V199 V200 V201 V202 V203 V204 V205 V206 V207 V208 V209 V210 V211 V212 V213 V214 V215 V216 V217 V218 V219 V220 V221 V222 V223 V224 V225 V226 V227 V228 V229 V230 V231 V232 V233 V234 V235 V236 V237 V238 V239 V240 V241 V242 V243 V244 V245 V246 V247 V248 V249 V250 V251 V252 V253 V254 V255 V256 V257 V258 V259 V260 V261 V262 V263 V264 V265 V266 V267 V268 V269 V270 V271 V272 V273 V274 V275 V276 V277 V278 V279 V280 V281 V282 V283 V284 V285 V286 V287 V288 V289 V290 V291 V292 V293 V294 V295 V296 V297 V298 V299 V300 V301 V302 V303 V304 V305 V306 V307 V308 V309 V310 V311 V312 V313 V314 V315 V316 V317 V318 V319 V320 V321 V322 V323 V324 V325 V326 V327 V328 V329 V330 V331 V332 V333 V334 V335 V336 V337 V338 V339 V340 V341 V342 V343 V344 V345 V346 V347 V348 V349 V350 V351 V352 V353 V354 V355 V356 V357 V358 V359 V360 V361 V362 V363 V364 V365 V366 V367 V368 V369 V370 V371 V372 V373 V374 V375 V376 V377 V378 V379 V380 V381 V382 V383 V384 V385 V386 V387 V388 V389 V390 V391 V392 V393 V394 V395 V396 V397 V398 V399 V400 V401 V402 V403 V404 V405 V406 V407 V408 V409 V410 V411 V412 V413 V414 V415 V416 V417 V418 V419 V420 V421 V422 V423 V424 V425 V426 V427 V428 V429 V430 V431 V432 V433 V434 V435 V436 V437 V438 V439 V440 V441 V442 V443 V444 V445 V446 V447 V448 V449 V450 V451 V452 V453 V454 V455 V456 V457 V458 V459 V460 V461 V462 V463 V464 V465 V466 V467 V468 V469 V470 V471 V472 V473 V474 V475 V476 V477 V478 V479 V480 V481 V482 V483 V484 V485 V486 V487 V488 V489 V490 V491 V492 V493 V494 V495 V496 V497 V498 V499 V500 V501 V502 V503 V504 V505 V506 V507 V508 V509 V510 V511 V512 V513 V514 V515 V516 V517 V518 V519 V520 V521 V522 V523 V524 V525 V526 V527 V528 V529 V530 V531 V532 V533 V534 V535 V536 V537 V538 V539 V540 V541 V542 V543 V544 V545 V546 V547 V548 V549 V550 V551 V552 V553 V554 V555 V556 V557 V558 V559 V560 V561 V562 V563 V564 V565 V566 V567 V568 V569 V570 V571 V572 V573 V574 V575 V576 V577 V578 V579 V580 V581 V582 V583 V584 V585 V586 V587 V588 V589 V590 V591 V592 V593 V594 V595 V596 V597 V598 V599 V600 V601 V602 V603 V604 V605 V606 V607 V608 V609 V610 V611 V612 V613 V614 V615 V616 V617 V618 V619 V620 V621 V622 V623 V624 V625 V626 V627 V628 V629 V630 V631 V632 V633 V634 V635 V636 V637 V638 V639 V640 V641 V642 V643 V644 V645 V646 V647 V648 V649 V650 V651 V652 V653 V654 V655 V656 V657 V658 V659 V660 V661 V662 V663 V664 V665 V666 V667 V668 V669 V670 V671 V672 V673 V674 V675 V676 V677 V678 V679 V680 V681 V682 V683 V684 V685 V686 V687 V688 V689 V690 V691 V692 V693 V694 V695 V696 V697 V698 V699 V700 V701 V702 V703 V704 V705 V706 V707 V708 V709 V710 V711 V712 V713 V714 V715 V716 V717 V718 V719 V720 V721 V722 V723 V724 V725 V726 V727 V728 V729 V730 V731 V732 V733 V734 V735 V736 V737 V738 V739 V740 V741 V742 V743 V744 V745 V746 V747 V748 V749 V750 V751 V752 V753 V754 V755 V756 V757 V758 V759 V760 V761 V762 V763 V764 V765 V766 V767 V768 V769 V770 V771 V772 V773 V774 V775 V776 V777 V778 V779 V780 V781 V782 V783 V784 label
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 73 253 227 73 21 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 73 251 251 251 174 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 16 166 228 251 251 251 122 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 62 220 253 251 251 251 251 79 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 79 231 253 251 251 251 251 232 77 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 145 253 253 253 255 253 253 253 253 255 108 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 144 251 251 251 253 168 107 169 251 253 189 20 0 0 0 0 0 0 0 0 0 0 0 0 0 0 27 89 236 251 235 215 164 15 6 129 251 253 251 35 0 0 0 0 0 0 0 0 0 0 0 0 0 47 211 253 251 251 142 0 0 0 37 251 251 253 251 35 0 0 0 0 0 0 0 0 0 0 0 0 0 109 251 253 251 251 142 0 0 0 11 148 251 253 251 164 0 0 0 0 0 0 0 0 0 0 0 0 11 150 253 255 211 25 0 0 0 0 11 150 253 255 211 25 0 0 0 0 0 0 0 0 0 0 0 0 140 251 251 253 107 0 0 0 0 0 37 251 251 211 46 0 0 0 0 0 0 0 0 0 0 0 0 0 190 251 251 253 128 5 0 0 0 0 37 251 251 51 0 0 0 0 0 0 0 0 0 0 0 0 0 0 115 251 251 253 188 20 0 0 32 109 129 251 173 103 0 0 0 0 0 0 0 0 0 0 0 0 0 0 217 251 251 201 30 0 0 0 73 251 251 251 71 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 166 253 253 255 149 73 150 253 255 253 253 143 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 140 251 251 253 251 251 251 251 253 251 230 61 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 190 251 251 253 251 251 251 251 242 215 55 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 21 189 251 253 251 251 251 173 103 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 31 200 253 251 96 71 20 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 56 105 220 254 63 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 18 166 233 253 253 253 236 209 209 209 77 18 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 84 253 253 253 253 253 254 253 253 253 253 172 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 57 238 253 253 253 253 253 254 253 253 253 253 253 119 0 0 0 0 0 0 0 0 0 0 0 0 0 14 238 253 253 253 253 253 253 179 196 253 253 253 253 238 12 0 0 0 0 0 0 0 0 0 0 0 0 33 253 253 253 253 253 248 134 0 18 83 237 253 253 253 14 0 0 0 0 0 0 0 0 0 0 0 0 164 253 253 253 253 253 128 0 0 0 0 57 119 214 253 94 0 0 0 0 0 0 0 0 0 0 0 57 248 253 253 253 126 14 4 0 0 0 0 0 0 179 253 248 56 0 0 0 0 0 0 0 0 0 0 175 253 253 240 190 28 0 0 0 0 0 0 0 0 179 253 253 173 0 0 0 0 0 0 0 0 0 0 209 253 253 178 0 0 0 0 0 0 0 0 0 0 92 253 253 208 0 0 0 0 0 0 0 0 0 0 211 254 254 179 0 0 0 0 0 0 0 0 0 0 0 135 255 209 0 0 0 0 0 0 0 0 0 0 209 253 253 90 0 0 0 0 0 0 0 0 0 0 0 134 253 208 0 0 0 0 0 0 0 0 0 0 209 253 253 178 0 0 0 0 0 0 0 0 0 0 2 142 253 208 0 0 0 0 0 0 0 0 0 0 209 253 253 214 35 0 0 0 0 0 0 0 0 0 30 253 253 208 0 0 0 0 0 0 0 0 0 0 165 253 253 253 215 36 0 0 0 0 0 0 0 0 163 253 253 164 0 0 0 0 0 0 0 0 0 0 18 172 253 253 253 214 127 7 0 0 0 0 0 72 232 253 171 17 0 0 0 0 0 0 0 0 0 0 0 8 182 253 253 253 253 162 56 0 0 0 64 240 253 253 14 0 0 0 0 0 0 0 0 0 0 0 0 0 7 173 253 253 253 253 245 241 239 239 246 253 225 14 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 18 59 138 224 253 253 254 253 253 253 240 96 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 37 104 192 255 253 253 182 73 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 29 170 255 255 141 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 29 198 255 255 255 226 255 86 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 141 255 255 170 29 0 86 255 255 141 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 29 226 255 198 57 0 0 0 0 226 255 255 226 114 0 0 0 0 0 0 0 0 0 0 0 0 0 29 255 255 114 0 0 0 0 0 0 141 170 114 255 255 141 0 0 0 0 0 0 0 0 0 0 0 0 226 255 170 0 0 0 0 0 0 0 29 57 0 0 141 255 226 0 0 0 0 0 0 0 0 0 0 57 255 170 0 0 0 0 0 0 0 0 0 0 0 0 0 114 255 198 0 0 0 0 0 0 0 0 0 226 255 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 170 255 57 0 0 0 0 0 0 0 0 255 226 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 255 170 0 0 0 0 0 0 0 0 255 170 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 114 198 0 0 0 0 0 0 0 0 255 226 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 86 255 0 0 0 0 0 0 0 0 198 255 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 86 255 0 0 0 0 0 0 0 0 114 255 57 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 86 255 0 0 0 0 0 0 0 0 29 255 226 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 141 255 0 0 0 0 0 0 0 0 0 170 255 170 0 0 0 0 0 0 0 0 0 0 0 0 0 0 226 198 0 0 0 0 0 0 0 0 0 29 226 255 170 0 0 0 0 0 0 0 0 0 0 0 0 29 255 114 0 0 0 0 0 0 0 0 0 0 29 226 255 141 0 0 0 0 0 0 0 0 0 0 57 226 226 0 0 0 0 0 0 0 0 0 0 0 0 0 141 255 255 170 86 0 0 0 0 29 86 226 255 226 29 0 0 0 0 0 0 0 0 0 0 0 0 0 0 86 198 255 255 255 255 255 255 255 255 255 141 29 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 29 114 170 170 170 170 170 86 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 203 254 252 252 252 214 51 20 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 62 221 252 250 250 250 252 250 160 20 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 62 211 250 252 250 250 250 252 250 250 49 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 41 221 250 250 252 250 250 250 252 250 128 10 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 254 252 252 252 254 252 252 252 254 252 252 90 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 150 190 250 250 252 250 250 169 171 250 250 250 82 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 31 191 250 250 252 189 100 20 172 250 250 250 80 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 213 250 250 250 212 29 0 0 252 250 250 250 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 92 252 252 252 0 0 0 0 51 252 252 252 203 0 0 0 0 0 0 0 0 0 0 0 0 0 0 82 252 250 250 169 0 0 0 0 132 250 250 250 121 0 0 0 0 0 0 0 0 0 0 0 0 0 92 231 252 250 159 20 0 0 0 0 252 250 250 250 0 0 0 0 0 0 0 0 0 0 0 0 0 0 30 211 252 250 221 40 0 0 0 0 90 250 250 250 163 0 0 0 0 0 0 0 0 0 0 0 0 0 31 213 254 232 80 0 0 0 0 0 92 252 252 212 163 0 0 0 0 0 0 0 0 0 0 0 0 0 151 250 252 149 0 0 0 0 0 0 252 250 250 49 0 0 0 0 0 0 0 0 0 0 0 0 0 0 60 221 252 210 60 0 0 0 0 0 252 250 250 49 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 202 252 250 221 40 0 0 123 202 252 250 250 49 0 0 0 0 0 0 0 0 0 0 0 0 0 0 123 243 255 252 252 252 254 252 252 252 254 252 100 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 121 171 250 250 250 252 250 250 250 252 250 100 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 20 160 250 250 252 250 250 250 252 189 40 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 20 170 250 252 250 128 49 49 29 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 68 254 255 254 107 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 11 176 230 253 253 253 212 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 28 197 253 253 253 253 253 229 107 14 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 194 253 253 253 253 253 253 253 253 53 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 69 241 253 253 253 253 241 186 253 253 195 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 10 161 253 253 253 246 40 57 231 253 253 195 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 140 253 253 253 253 154 0 25 253 253 253 195 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 213 253 253 253 135 8 0 3 128 253 253 195 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 77 238 253 253 253 7 0 0 0 116 253 253 195 0 0 0 0 0 0 0 0 0 0 0 0 0 0 11 165 253 253 231 70 1 0 0 0 78 237 253 195 0 0 0 0 0 0 0 0 0 0 0 0 0 0 33 253 253 253 182 0 0 0 0 0 0 200 253 195 0 0 0 0 0 0 0 0 0 0 0 0 0 0 98 253 253 253 24 0 0 0 0 0 0 42 253 195 0 0 0 0 0 0 0 0 0 0 0 0 0 0 197 253 253 253 24 0 0 0 0 0 0 163 253 195 0 0 0 0 0 0 0 0 0 0 0 0 0 0 197 253 253 189 13 0 0 0 0 0 53 227 253 121 0 0 0 0 0 0 0 0 0 0 0 0 0 0 197 253 253 114 0 0 0 0 0 21 227 253 231 27 0 0 0 0 0 0 0 0 0 0 0 0 0 0 197 253 253 114 0 0 0 5 131 143 253 231 59 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 197 253 253 236 73 58 217 223 253 253 253 174 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 197 253 253 253 253 253 253 253 253 253 253 48 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 149 253 253 253 253 253 253 253 253 182 15 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 12 168 253 253 253 253 253 248 89 23 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

Use only those columns from Test data that are present in Training data.

In [74]:
test = test.apply(pd.to_numeric)
test = test[non_zero_cols]
test.drop(columns=to_drop,inplace=True)
test.reset_index(inplace=True,drop=True)
In [75]:
test.fillna(value=0,inplace=True)
test.shape
Out[75]:
(24227, 702)

Transform test data into PCA and LDA dimensions.

In [76]:
testPCA = pca.transform(test.iloc[:,:-1])
testLDA = clf.transform(test.iloc[:,:-1])

Get predictions for Test data using PCA dimensions and Full Variance Covariance matrix.

In [78]:
pred_pca_full_test = np.array(predict(testPCA,muListPCA,sigmaListPCA,invSigmaListPCA,priors))
pca_full_test_accuracy = accuracy_score(pred_pca_full_test,test['label'])
pca_full_test_accuracy
0 1000 2000 3000 4000 5000 6000 7000 8000 9000 10000 11000 12000 13000 14000 15000 16000 17000 18000 19000 20000 21000 22000 23000 24000 
Out[78]:
0.8619309035373757

Get predictions for Test data using PCA dimensions and only Diagonal Covariance Matrix.

In [79]:
pred_pca_diag_test = np.array(predict(testPCA,muListPCA,sigmaListPCADiag,invSigmaListPCADiag,priors))
pca_diag_test_accuracy = accuracy_score(pred_pca_diag_test,test['label'])
pca_diag_test_accuracy
0 1000 2000 3000 4000 5000 6000 7000 8000 9000 10000 11000 12000 13000 14000 15000 16000 17000 18000 19000 20000 21000 22000 23000 24000 
Out[79]:
0.7609691666322698

Get predictions for Test data using LDA dimensions and Full Variance Covariance matrix.

In [80]:
pred_lda_full_test = np.array(predict(testLDA,muListLDA,sigmaListLDA,invSigmaListLDA,priors))
lda_full_test_accuracy = accuracy_score(pred_lda_full_test,test['label'])
lda_full_test_accuracy
0 1000 2000 3000 4000 5000 6000 7000 8000 9000 10000 11000 12000 13000 14000 15000 16000 17000 18000 19000 20000 21000 22000 23000 24000 
Out[80]:
0.887893672349032

Get predictions for Test data using LDA dimensions and only Diagonal Covariance Matrix.

In [81]:
pred_lda_diag_test = np.array(predict(testLDA,muListLDA,sigmaListLDADiag,invSigmaListLDADiag,priors))
lda_diag_test_accuracy = accuracy_score(pred_lda_diag_test,test['label'])
lda_diag_test_accuracy
0 1000 2000 3000 4000 5000 6000 7000 8000 9000 10000 11000 12000 13000 14000 15000 16000 17000 18000 19000 20000 21000 22000 23000 24000 
Out[81]:
0.8750154785982581

We note that the Test accuracies vary in the following fashion:

  • LDA Full Variance Covariance Matrix -              0.887893672349032
  • LDA Diagonal Variance Covariance Matrix -     0.8750154785982581
  • PCA Full Variance Covariance Matrix -              0.8619309035373757
  • PCA Diagonal Variance Covariance Matrix -     0.7609691666322698

This is quite expected as PCA dimensions do not necessarily separate out different classes, while LDA explicitly does that. Also, models with Diagonal Variance-Covariance matrices would perform worse than full Variance-Covariance matrices since we are losing information, and creating elliptical boundaries parallel to axes in comparison to case where these can have free orientation.